# ---------------------------------------------------------------------------------------------------- #
# PowerDistanceSystemLevelAnalysis01.R
# ---------------------------------------------------------------------------------------------------- #
#
# Date: 2012-07-20
# Authors: Jonathan Markowitz and Christopher Fariss
#
# Title: Going the Distance: The Price of Projecting Power
# Most recent version available at: http://ssrn.com/abstract=1961454
#
# All inquires about the models and code should be sent to Chris Fariss 
# Contact: cjf0006@gmail.com
#
# Copyright (c) 2012, under the Creative Commons Attribution-Noncommercial-Share Alike 3.0 United States License.
# For more information see: http://creativecommons.org/licenses/by-nc-sa/3.0/us/
# All rights reserved. 
#
# NOTE1: see the paper and PowerDistanceREADME.txt for information on data sources used in this project
#
# R code begins below
# ---------------------------------------------------------------------------------------------------- #

rm(list = ls())

# ---------------------------------------------------------------------------------------------------- #
# load packages
# ---------------------------------------------------------------------------------------------------- #
library(MASS); library(foreign); library(Zelig); library(lmtest); library(xtable); library(plotrix)

# ---------------------------------------------------------------------------------------------------- #
# load data 
# ---------------------------------------------------------------------------------------------------- #

# load Maoz MID data with latitute and longitue columns from Braithwaite
mid <- read.csv("mid_lat_long.csv")

# load capital latitude and longitude location data 
# state code = ccode 
cap.loc <- read.csv("latlong.csv")
er <- read.csv("er_data.csv")

# load capital latitude and longitude location data 
# state code = ccode 
state.count <- read.csv("state_count.csv")

# load polity data 
polity <- read.csv("polity4.csv",na.strings = c("-77", "-88", "-99", "NA"))


# load COW congtiguity data 
contiguity <- read.csv("contdird.csv")
contiguity <- subset(contiguity, select=c(state1no, state2no, year, conttype))


# ---------------------------------------------------------------------------------------------------- #
# merge data 
# ---------------------------------------------------------------------------------------------------- #
data.step.01 <- mid

names(cap.loc)<-c("ccode", "state.name.B", "latitude.cap.B", "longitude.cap.B")
data.step.02 <- as.data.frame(merge(data.step.01, cap.loc, by.x=c("STATEB"), by.y=c("ccode"), all.x=FALSE, all.y=FALSE))
nrow(data.step.02) # check record number after merge

names(cap.loc)<-c("ccode", "state.name.A", "latitude.cap.A", "longitude.cap.A")
data.step.03 <- as.data.frame(merge(data.step.02, cap.loc, by.x=c("STATEA"), by.y=c("ccode"), all.x=FALSE, all.y=FALSE))
nrow(data.step.03) # check record number after merge

data.step.04 <- as.data.frame(merge(data.step.03, contiguity, by.x=c("STATEA", "STATEB", "YEAR"), by.y=c("state1no", "state2no", "year"), all.x=TRUE, all.y=FALSE))
data.step.04$conttype[is.na(data.step.04$conttype)] <- 6 # not contiguous
nrow(data.step.04)

data.step.05 <- na.omit(data.step.04)
nrow(data.step.05)

data.final <- data.step.05



# ---------------------------------------------------------------------------------------------------- #
# generate geocoded distances between capital city locations and MID locations
# ---------------------------------------------------------------------------------------------------- #
R <- 6378.7 
lat2 <- data.final$latitude
lat1 <- data.final$latitude.cap.A
long2 <- data.final$longitude
long1 <- data.final$longitude.cap.A
distance.A <- R * acos(sin(lat1/57.2958) * sin(lat2/57.2958) + cos(lat1/57.2958) * cos(lat2/57.2958) *  cos(long2/57.2958 -long1/57.2958))

lat2 <- data.final$latitude
lat1 <- data.final$latitude.cap.B
long2 <- data.final$longitude
long1 <- data.final$longitude.cap.B
distance.B <- R * acos(sin(lat1/57.2958) * sin(lat2/57.2958) + cos(lat1/57.2958) * cos(lat2/57.2958) *  cos(long2/57.2958 -long1/57.2958))

lat2 <- data.final$latitude.cap.A
lat1 <- data.final$latitude.cap.B
long2 <- data.final$longitude.cap.A
long1 <- data.final$longitude.cap.B
distance.AB <- R * acos(sin(lat1/57.2958) * sin(lat2/57.2958) + cos(lat1/57.2958) * cos(lat2/57.2958) *  cos(long2/57.2958 -long1/57.2958))


# ---------------------------------------------------------------------------------------------------- #
# create new data frame with generated geocoded distances between capital city locations and MID locations
# ---------------------------------------------------------------------------------------------------- #
disno <- data.final$DISNO
year <- data.final$YEAR
statea <- data.final$STATEA
stateb <- data.final$STATEB
disno <- data.final$DISNO
fat.level <- data.final$FATLEV
mid.level <- data.final$HIHOST
state.name.A <- data.final$state.name.A
state.name.B <- data.final$state.name.B
conttype <- data.final$conttype

distance.max <- 0
distance.max[distance.A >= distance.B] <- distance.A[distance.A >= distance.B]
distance.max[distance.A < distance.B] <- distance.B[distance.A < distance.B]

d.data <- as.data.frame(cbind(disno, year, statea, stateb, distance.A, distance.B, distance.AB, distance.max, fat.level, mid.level, conttype))
nrow(d.data)


# ---------------------------------------------------------------------------------------------------- #
# calculate descriptive statistics for distance mean given level of contiguity from COW
# ---------------------------------------------------------------------------------------------------- #
mean(d.data$distance.AB) # all mids
mean(d.data$distance.AB[d.data$conttype==6]) # not contiguous
mean(d.data$distance.AB[d.data$conttype<=5]) # direct + 400 miles distant
mean(d.data$distance.AB[d.data$conttype<=4]) # direct + 150 miles distant
mean(d.data$distance.AB[d.data$conttype<=3]) # direct + 24 miles distant
mean(d.data$distance.AB[d.data$conttype<=2]) # direct + 12 miles distant
mean(d.data$distance.AB[d.data$conttype<=1]) # direct contiguity only 


# ---------------------------------------------------------------------------------------------------- #
# save this data if no file exists (this file is used in the next R script, which analysis distance of MID)
# ---------------------------------------------------------------------------------------------------- #
#write.csv(d.data, "mid_participants_distance_20100118.csv")
write.csv(d.data, "mid_participants_distance_20120627.csv")


# ---------------------------------------------------------------------------------------------------- #
# subset and aggregate data for analysis 
# ---------------------------------------------------------------------------------------------------- #
nrow(d.data)
av <- d.data

#av <- subset(av, fat.level>0) # comment out this line to include all MIDs; currently includes all MIDS with at least one fatality (i.e., an estimate of 1-25 fatalities).
av <- subset(av, mid.level>3) # comment out this line to include all MIDs; currently include all MIDS with a hostility value greater than 3.

d.data <- av
nrow(d.data)

av <- aggregate(av, by=list(av$year), FUN=mean, na.rm=TRUE)
av <- subset(av, select=c("year", "distance.max"))


# ---------------------------------------------------------------------------------------------------- #
# create system level dependent variables
# ---------------------------------------------------------------------------------------------------- #

EXCLUDE <- FALSE
# NOTE: exclude contiguous MIDs from analysis?
if(EXCLUDE==TRUE)TEST <- ifelse(d.data$conttype>=6,1,0) # excluce 
if(EXCLUDE==FALSE)TEST <- ifelse(d.data$conttype>=1,1,0) # not exclude

xmax <- as.data.frame(table(d.data$year[d.data$year>=1869 & d.data$year<=1937 & TEST==1]))
names(xmax)[1] <- "year"

x6000 <- as.data.frame(table(d.data$year[d.data$distance.max<6000 & d.data$year>=1869 & d.data$year<=1937 & TEST==1]))
names(x6000)[1] <- "year"

x5000 <- as.data.frame(table(d.data$year[d.data$distance.max<5000 & d.data$year>=1869 & d.data$year<=1937 & TEST==1]))
names(x5000)[1] <- "year"

x4000 <- as.data.frame(table(d.data$year[d.data$distance.max<4000 & d.data$year>=1869 & d.data$year<=1937 & TEST==1]))
names(x4000)[1] <- "year"

x3000 <- as.data.frame(table(d.data$year[d.data$distance.max<3000 & d.data$year>=1869 & d.data$year<=1937 & TEST==1]))
names(x3000)[1] <- "year"

x2000 <- as.data.frame(table(d.data$year[d.data$distance.max<2000 & d.data$year>=1869 & d.data$year<=1937 & TEST==1]))
names(x2000)[1] <- "year"

x1000 <- as.data.frame(table(d.data$year[d.data$distance.max<1000 & d.data$year>=1869 & d.data$year<=1937 & TEST==1]))
names(x1000)[1] <- "year"

x0500 <- as.data.frame(table(d.data$year[d.data$distance.max<500 & d.data$year>=1869 & d.data$year<=1937 & TEST==1]))
names(x0500)[1] <- "year"


# ---------------------------------------------------------------------------------------------------- #
# bind dependent variables in to data set d 
# ---------------------------------------------------------------------------------------------------- #
x <- merge(xmax,x6000, by="year", all=TRUE)
names(x) <- c("year", "xmax", "x6000")

x <- merge(x,x5000, by="year", all=TRUE)
names(x) <- c("year", "xmax", "x5000")

x <- merge(x,x4000, by="year", all=TRUE)
names(x) <- c("year", "xmax", "x6000", "x5000", "x4000")

x <- merge(x,x3000, by="year", all=TRUE)
names(x) <- c("year", "xmax", "x6000", "x5000", "x4000", "x3000")

x <- merge(x,x2000, by="year", all=TRUE)
names(x) <- c("year", "xmax", "x6000", "x5000", "x4000", "x3000", "x2000")

x <- merge(x,x1000, by="year", all=TRUE)
names(x) <- c("year", "xmax", "x6000", "x5000", "x4000", "x3000", "x2000", "x1000")

x <- merge(x,x1000, by="year", all=TRUE)
names(x) <- c("year", "xmax", "x6000", "x5000", "x4000", "x3000", "x2000", "x1000", "x0500")

year <- seq(from=1869, to=1936, by=1)
year <- as.data.frame(year)
names(year)[1] <- "year"

x <- merge(year, x, by="year", all=TRUE)
s <- subset(er, year>=1869 & year<=1936, select=c(year, Tramp, RailCost, Indus, Power))


# ---------------------------------------------------------------------------------------------------- #
# create system democracy score
# ---------------------------------------------------------------------------------------------------- #
dem <- ifelse(polity$POLITY2>=7,1,0)
dem.percent <- as.numeric(cbind(table(dem, polity$YEAR)[2,]/(table(dem, polity$YEAR)[1,] + table(dem, polity$YEAR)[2,])))
year <- 1800:2007
dem.data <- cbind(year, dem.percent)


# ---------------------------------------------------------------------------------------------------- #
# merge all the data 
# ---------------------------------------------------------------------------------------------------- #
d <- merge(x,s, by="year", all.x=TRUE)
d <- merge(d, av, by="year", all.x=TRUE)
d <- merge(d, state.count, by="year", all.x=TRUE)
d <- merge(d, dem.data, by="year", all.x=TRUE)

d$xmax[is.na(d$xmax)]<-0
d$x5000[is.na(d$x6000)]<-0
d$x5000[is.na(d$x5000)]<-0
d$x4000[is.na(d$x4000)]<-0
d$x3000[is.na(d$x3000)]<-0
d$x2000[is.na(d$x2000)]<-0
d$x1000[is.na(d$x1000)]<-0
d$x0500[is.na(d$x0500)]<-0
d$distance.max[is.na(d$distance.max)]<-0


# ---------------------------------------------------------------------------------------------------- #
# descriptive statistics of dependent variables 
# ---------------------------------------------------------------------------------------------------- #

# these are MID counts by distance cohort 
summary(d$xmax); summary(d$x6000); summary(d$x5000); summary(d$x4000); summary(d$x3000); summary(d$x2000); summary(d$x1000); summary(d$x0500)

# this is the maximal MID distance for a given system year  
summary(d$distance.max)


# ---------------------------------------------------------------------------------------------------- #
# PLOTS
# ---------------------------------------------------------------------------------------------------- #

# great circle distance plot (presented in an earlier version of the paper)
plot(0,0, ylim=c(-21,21), xlim=c(-21,21), type="n",main="Power Projection",xlab="", ylab="", xaxt="n",  yaxt="n", cex.lab=1.5)
draw.circle(0,0,1,border="grey70",lty=1,lwd=3)
draw.circle(0,0,5,border="grey70",lty=1,lwd=3)
draw.circle(0,0,10,border="grey70",lty=1,lwd=3)
draw.circle(0,0,15,border="grey70",lty=1,lwd=3)
draw.circle(0,0,20,border="grey70",lty=1,lwd=3)
text(x=1.25, y=1.25, labels="1,000 km")
text(x=4.75, y=4.75, labels="2,000 km")
text(x=9, y=9, labels="3,000 km")
text(x=12.5, y=12.5, labels="5,000 km")
text(x=16, y=16, labels="MAX km")

# generate individual plots (presented in an earlier version of the paper)
par(mfrow=c(1,1))
plot(table(d.data$year[d.data$distance.max>0 & d.data$year>1869 & d.data$year<1937]), ylim=c(0,100), xlim=c(1869,1940), main="Militarized Interstate Disputes \n in the International System over Time", ylab="MID count", xlab="year")
plot(er$year, er$Tramp, ylim=c(0,800), xlim=c(1869,1950), ylab="Tramp", xlab="", main="Shipping Cost Over Time" , pch=20, col=1, type="l", cex.main=2, lwd=3)
plot(er$year, er$RailCost, ylim=c(0,1.5), xlim=c(1850,1950), ylab="Rail Cost", xlab="", main="", pch=20, col=1, type="l", lwd=2)

# generate combined plot (presented in the current version of the paper)
par(mfrow=c(3,1), cex=1.3, mai=c(0,0,0,0),omi=c(0.6,0.8,0.05,0.8), font=2, font.lab=2)
plot(table(d.data$year[d.data$distance.max>0 & d.data$year>1869 & d.data$year<1937]), ylim=c(0,90), xlim=c(1870,1936), main="", ylab="MID count", xlab="", xaxt="n", yaxt="n")
text(1870, 75, "Militarized Interstate Disputes", pos=4)
axis(side=2, at=c(0,20,40,60,80,100), las=2)

plot(er$year, er$Tramp, ylim=c(0,800), xlim=c(1870,1937), ylab="", xlab="", main="" , pch=20, col=1, type="l", xaxt="n", yaxt="n", lwd=3)
text(1870, 650, "Tramp Shipping", pos=4)
axis(side=4, at=c(0,200,400,600,800), las=2)

plot(er$year, er$RailCost, ylim=c(0,1.5), xlim=c(1870,1937), ylab="", xlab="", main="", pch=20, col=1, type="l", yaxt="n", lwd=3)
text(1870, 1.15, "Rail Cost", pos=4)
axis(side=2, at=c(0,0.4,0.8,1.2,1.6), las=2)


# ---------------------------------------------------------------------------------------------------- #
# generate lagged dependent variables 
# ---------------------------------------------------------------------------------------------------- #
d$xmax.lag <- c(NA, d$xmax[1:(length(d$xmax)-1)])
d$x6000.lag <- c(NA, d$x6000[1:(length(d$x6000)-1)])
d$x5000.lag <- c(NA, d$x5000[1:(length(d$x5000)-1)])
d$x4000.lag <- c(NA, d$x4000[1:(length(d$x4000)-1)])
d$x3000.lag <- c(NA, d$x3000[1:(length(d$x3000)-1)])
d$x2000.lag <- c(NA, d$x2000[1:(length(d$x2000)-1)])
d$x1000.lag <- c(NA, d$x1000[1:(length(d$x1000)-1)])
d$x0500.lag <- c(NA, d$x0500[1:(length(d$x0500)-1)])
d$distance.max.lag <- c(NA, d$distance.max[1:(length(d$distance.max)-1)])

# generate lagged independent variables
d$Tramp.lag <- c(NA, d$Tramp[1:(length(d$Tramp)-1)])
d$RailCost.lag <- c(NA, d$RailCost[1:(length(d$RailCost)-1)])
d$Indus.lag <- c(NA, d$Indus[1:(length(d$Indus)-1)])
d$Power.lag <- c(NA, d$Power[1:(length(d$Power)-1)])

# Generate outer circle count dependent variables and lagged variables
d$outer.0500 <- d$xmax - d$x0500
d$outer.0500.lag <- d$xmax.lag - d$x0500.lag

d$outer.1000 <- d$xmax - d$x1000
d$outer.1000.lag <- d$xmax.lag - d$x1000.lag

d$outer.2000 <- d$xmax - d$x2000
d$outer.2000.lag <- d$xmax.lag - d$x2000.lag

d$outer.3000 <- d$xmax - d$x3000
d$outer.3000.lag <- d$xmax.lag - d$x3000.lag

d$outer.4000 <- d$xmax - d$x4000
d$outer.4000.lag <- d$xmax.lag - d$x4000.lag

d$outer.5000 <- d$xmax - d$x5000
d$outer.5000.lag <- d$xmax.lag - d$x5000.lag

d$outer.6000 <- d$xmax - d$x6000
d$outer.6000.lag <- d$xmax.lag - d$x6000.lag


# ---------------------------------------------------------------------------------------------------- #
# NOTE: uncomment to subset out WWI for Robustness Test
# d <- subset(d, year!=1914 & year!=1915 & year!=1916 & year!=1917 & year!=1918 & year!=1919)

# ---------------------------------------------------------------------------------------------------- #
# (formerly Table 1 but we report the estimation of this model in the main text of the paper)
# estimate poisson regression models using the Zelig package 
# ---------------------------------------------------------------------------------------------------- #
model.max <- zelig(xmax ~ xmax.lag + log(Tramp.lag) + Indus.lag + state.count + dem.percent, model="poisson", data=d, robust=TRUE)

# model summaries
summary(model.max)

# create LaTeX tables with the xtable package
#xtable(model.max)

# ---------------------------------------------------------------------------------------------------- #
# generate quantities of interest after Model with Zelig. see King, Tomz and Wittenberg (2000)
# ---------------------------------------------------------------------------------------------------- #
max.mean <- exp(mean(log(d$Tramp.lag[!is.na(d$Tramp.lag)])))
max.sd.l <- exp(mean(log(d$Tramp.lag[!is.na(d$Tramp.lag)])) - sd(log(d$Tramp.lag[!is.na(d$Tramp.lag)])))
max.sd.h <- exp(mean(log(d$Tramp.lag[!is.na(d$Tramp.lag)])) + sd(log(d$Tramp.lag[!is.na(d$Tramp.lag)])))

# prediction with shipping at its mean value
out <- setx(model.max, Tramp.lag=max.mean) 
sout <- sim(model.max,x=out) 
summary(sout)

# prediction with shipping one standard deviation below its mean 
out <- setx(model.max, Tramp.lag=max.sd.l) 
sout <- sim(model.max,x=out) 
summary(sout)

# prediction with shipping one standard deviation above its mean
out <- setx(model.max, Tramp.lag=max.sd.h) 
sout <- sim(model.max,x=out) 
summary(sout)

# ---------------------------------------------------------------------------------------------------- #
# set PLOT to TRUE to generate quantity of interest plots. 
# the predicted values are based on simulations so the plot takes a while
# ---------------------------------------------------------------------------------------------------- #
PLOT <- FALSE
if(PLOT==TRUE){

# create vectors to hold predicted values
v.ev <- v.ev.l <- v.ev.h <- NA

#Tramp.seq <- seq(from=min(d$Tramp.lag[!is.na(d$Tramp.lag)]), to=max(d$Tramp.lag[!is.na(d$Tramp.lag)]), by=1)
Tramp.seq <- seq(from=1, to=800, by=1)

for(i in 1: length(Tramp.seq)){
	out <- setx(model.max, Tramp.lag=Tramp.seq[i]) 
	sout <- sim(model.max,x=out) 

	v.ev[i] <- mean(sout$qi$ev)
	v.ev.l[i] <- quantile(sout$qi$ev, probs=c(0.025))
	v.ev.h[i] <- quantile(sout$qi$ev, probs=c(0.975))
} ## end for loop

par(mfrow=c(1,1), mar=c(5,4,4,4), font=2, font.lab=2)
plot(Tramp.seq, v.ev, ylim=c(0,12), xlim=c(27,740), main="Substantive Effect of Tramp on the Number of Militarized \n Interstate Disputes in the International System 1870-1936", ylab="Expected Number of MIDs", xlab="Tramp Values from Min=45 to Max=751", type="n", xaxt="n", yaxt="n")
axis(side=1, at=c(0,100,200,300,400,500,600,700), las=1, font=2)
axis(side=2, at=c(0,2,4,6,8,10,12), las=2, font=2)

low <- lowess(Tramp.seq, v.ev.l, f=.05)
high <- lowess(Tramp.seq, v.ev.h, f=.05)

for(i in 1:length(Tramp.seq)){
	lines(c(low$x[i],high$x[i]), c(low$y[i],high$y[i]), col="grey70", lwd=2)
}
lines(lowess(Tramp.seq, v.ev, f=.05), lwd=3, col="darkblue")
lines(lowess(Tramp.seq, v.ev.h, f=.05), lwd=2, col="grey70")
lines(lowess(Tramp.seq, v.ev.l, f=.05), lwd=2, col="grey70")
rug(d$Tramp.lag, ticksize = 0.05, side = 1, lwd = 0.5, col=4)
} # end if statement and plot code 

# ---------------------------------------------------------------------------------------------------- #
# Table 1 (formerly Table 2)
# estimate negative binomial regression models using the Zelig package
# ---------------------------------------------------------------------------------------------------- #
model.max <- zelig(xmax ~ xmax.lag + log(Tramp.lag) + Indus.lag + state.count + dem.percent, model="negbin", data=d, robust=TRUE)

# model summaries 
summary(model.max)

# create LaTeX tables with the xtable package 
#xtable(model.max)

# ---------------------------------------------------------------------------------------------------- #
# generate quantities of interest after Model with Zelig. see King, Tomz and Wittenberg (2000)
# ---------------------------------------------------------------------------------------------------- #
max.mean <- exp(mean(log(d$Tramp.lag[!is.na(d$Tramp.lag)])))
max.sd.l <- exp(mean(log(d$Tramp.lag[!is.na(d$Tramp.lag)])) - sd(log(d$Tramp.lag[!is.na(d$Tramp.lag)])))
max.sd.h <- exp(mean(log(d$Tramp.lag[!is.na(d$Tramp.lag)])) + sd(log(d$Tramp.lag[!is.na(d$Tramp.lag)])))

# prediction with shipping at its mean value
out <- setx(model.max, Tramp.lag=max.mean) 
sout <- sim(model.max,x=out) 
summary(sout)

# prediction with shipping one standard deviation below its mean 
out <- setx(model.max, Tramp.lag=max.sd.l) 
sout <- sim(model.max,x=out) 
summary(sout)

# prediction with shipping one standard deviation above its mean
out <- setx(model.max, Tramp.lag=max.sd.h) 
sout <- sim(model.max,x=out) 
summary(sout)

# ---------------------------------------------------------------------------------------------------- #
# set PLOT to TRUE to generate quantity of interest plots. 
# the predicted values are based on simulations so the plot takes a while
# ---------------------------------------------------------------------------------------------------- #
PLOT <- FALSE
if(PLOT==TRUE){

# create vectors to hold predicted values
v.ev <- v.ev.l <- v.ev.h <- NA

#Tramp.seq <- seq(from=min(d$Tramp.lag[!is.na(d$Tramp.lag)]), to=max(d$Tramp.lag[!is.na(d$Tramp.lag)]), by=1)
Tramp.seq <- seq(from=1, to=800, by=1)

for(i in 1: length(Tramp.seq)){
	out <- setx(model.max, Tramp.lag=Tramp.seq[i]) 
	sout <- sim(model.max,x=out) 

	v.ev[i] <- mean(sout$qi$ev)
	v.ev.l[i] <- quantile(sout$qi$ev, probs=c(0.025))
	v.ev.h[i] <- quantile(sout$qi$ev, probs=c(0.975))
} ## end for loop

par(mfrow=c(1,1), mar=c(5,4,4,4), font=2, font.lab=2)
plot(Tramp.seq, v.ev, ylim=c(0,12), xlim=c(27,740), main="Substantive Effect of Tramp on the Number of Militarized \n Interstate Disputes in the International System 1870-1936", ylab="Expected Number of MIDs", xlab="Tramp Values from Min=45 to Max=751", type="n", xaxt="n", yaxt="n")
axis(side=1, at=c(0,100,200,300,400,500,600,700), las=1, font=2)
axis(side=2, at=c(0,2,4,6,8,10,12), las=2, font=2)

low <- lowess(Tramp.seq, v.ev.l, f=.05)
high <- lowess(Tramp.seq, v.ev.h, f=.05)

for(i in 1:length(Tramp.seq)){
	lines(c(low$x[i],high$x[i]), c(low$y[i],high$y[i]), col="grey70", lwd=2)
}
lines(lowess(Tramp.seq, v.ev, f=.05), lwd=3, col="darkblue")
lines(lowess(Tramp.seq, v.ev.h, f=.05), lwd=2, col="grey70")
lines(lowess(Tramp.seq, v.ev.l, f=.05), lwd=2, col="grey70")
rug(d$Tramp.lag, ticksize = 0.05, side = 1, lwd = 0.5, col=4)
} # end if statement and plot code 

# ---------------------------------------------------------------------------------------------------- #
# Table 2
# Negative Binomial Regression Robustness check with Rail Cost Shipping Variable 
# ---------------------------------------------------------------------------------------------------- #
model.max <- zelig(xmax ~ xmax.lag + RailCost.lag  + Indus.lag + state.count + dem.percent, model="negbin", data=d, robust=TRUE)

# model summaries 
summary(model.max)

# create LaTeX tables with the xtable package 
#xtable(model.max)

# ---------------------------------------------------------------------------------------------------- #
# generate quantities of interest after Model with Zelig. see King, Tomz and Wittenberg (2000)
# ---------------------------------------------------------------------------------------------------- #
max.mean <- mean(d$RailCost.lag[!is.na(d$RailCost.lag)])
max.sd.l <- mean(d$RailCost.lag[!is.na(d$RailCost.lag)]) - sd(d$RailCost.lag[!is.na(d$RailCost.lag)])
max.sd.h <- mean(d$RailCost.lag[!is.na(d$RailCost.lag)]) + sd(d$RailCost.lag[!is.na(d$RailCost.lag)])

# prediction with shipping at its mean value
out <- setx(model.max, RailCost.lag=max.mean) 
sout <- sim(model.max,x=out) 
summary(sout)

# prediction with shipping one standard deviation below its mean 
out <- setx(model.max, RailCost.lag=max.sd.l) 
sout <- sim(model.max,x=out) 
summary(sout)

# prediction with shipping one standard deviation above its mean
out <- setx(model.max, RailCost.lag=max.sd.h) 
sout <- sim(model.max,x=out) 
summary(sout)


# ---------------------------------------------------------------------------------------------------- #
# Table 4 and alternatives 
# Negative Binomial Models of distance cohort dependent variables 
# ---------------------------------------------------------------------------------------------------- #
model.outer.0500 <- zelig(outer.0500 ~ outer.0500.lag + x0500.lag + log(Tramp.lag) + Indus.lag + state.count + dem.percent, model="negbin", data=d, robust=TRUE)
model.outer.1000 <- zelig(outer.1000 ~ outer.1000.lag + x1000.lag + log(Tramp.lag) + Indus.lag + state.count + dem.percent, model="negbin", data=d, robust=TRUE)
model.outer.2000 <- zelig(outer.2000 ~ outer.2000.lag + x2000.lag + log(Tramp.lag) + Indus.lag + state.count + dem.percent, model="negbin", data=d, robust=TRUE)
model.outer.3000 <- zelig(outer.3000 ~ outer.3000.lag + x3000.lag + log(Tramp.lag) + Indus.lag + state.count + dem.percent, model="negbin", data=d, robust=TRUE)
model.outer.4000 <- zelig(outer.4000 ~ outer.4000.lag + x4000.lag + log(Tramp.lag) + Indus.lag + state.count + dem.percent, model="negbin", data=d, robust=TRUE)
model.outer.5000 <- zelig(outer.5000 ~ outer.5000.lag + x5000.lag + log(Tramp.lag) + Indus.lag + state.count + dem.percent, model="negbin", data=d, robust=TRUE)
model.outer.6000 <- zelig(outer.6000 ~ outer.6000.lag + x6000.lag + log(Tramp.lag) + Indus.lag + state.count + dem.percent, model="negbin", data=d, robust=TRUE)

summary(model.outer.0500)
summary(model.outer.1000)
summary(model.outer.2000)
summary(model.outer.3000)
summary(model.outer.4000)
summary(model.outer.5000)
summary(model.outer.6000)

# create LaTeX tables with the xtable package
#xtable(model.outer.0500)
#xtable(model.outer.1000)
#xtable(model.outer.2000)
#xtable(model.outer.3000)
#xtable(model.outer.4000)
#xtable(model.outer.5000)
#xtable(model.outer.6000)

# ---------------------------------------------------------------------------------------------------- #
# generate quantities of interest after Model with Zelig. see King, Tomz and Wittenberg (2000)
# ---------------------------------------------------------------------------------------------------- #
max.mean <- exp(mean(log(d$Tramp.lag[!is.na(d$Tramp.lag)])))
max.sd.l <- exp(mean(log(d$Tramp.lag[!is.na(d$Tramp.lag)])) - sd(log(d$Tramp.lag[!is.na(d$Tramp.lag)])))
max.sd.h <- exp(mean(log(d$Tramp.lag[!is.na(d$Tramp.lag)])) + sd(log(d$Tramp.lag[!is.na(d$Tramp.lag)])))

# prediction with shipping at its mean value
out <- setx(model.outer.2000, Tramp.lag=max.mean) 
sout <- sim(model.outer.2000,x=out) 
summary(sout)

# prediction with shipping one standard deviation below its mean 
out <- setx(model.outer.2000, Tramp.lag=max.sd.l) 
sout <- sim(model.outer.2000,x=out) 
summary(sout)

# prediction with shipping one standard deviation above its mean
out <- setx(model.outer.2000, Tramp.lag=max.sd.h) 
sout <- sim(model.outer.2000,x=out) 
summary(sout)


# ---------------------------------------------------------------------------------------------------- #
# Table 5 and alternatives 
# ---------------------------------------------------------------------------------------------------- #
model.outer.0500 <- zelig(outer.0500 ~ outer.0500.lag + x0500.lag + RailCost.lag + Indus.lag + state.count + dem.percent, model="negbin", data=d, robust=TRUE)
model.outer.1000 <- zelig(outer.1000 ~ outer.1000.lag + x1000.lag + RailCost.lag + Indus.lag + state.count + dem.percent, model="negbin", data=d, robust=TRUE)
model.outer.2000 <- zelig(outer.2000 ~ outer.2000.lag + x2000.lag + RailCost.lag + Indus.lag + state.count + dem.percent, model="negbin", data=d, robust=TRUE)
model.outer.3000 <- zelig(outer.3000 ~ outer.3000.lag + x3000.lag + RailCost.lag + Indus.lag + state.count + dem.percent, model="negbin", data=d, robust=TRUE)
model.outer.4000 <- zelig(outer.4000 ~ outer.4000.lag + x4000.lag + RailCost.lag + Indus.lag + state.count + dem.percent, model="negbin", data=d, robust=TRUE)
model.outer.5000 <- zelig(outer.5000 ~ outer.5000.lag + x5000.lag + RailCost.lag + Indus.lag + state.count + dem.percent, model="negbin", data=d, robust=TRUE)
model.outer.6000 <- zelig(outer.6000 ~ outer.6000.lag + x6000.lag + RailCost.lag + Indus.lag + state.count + dem.percent, model="negbin", data=d, robust=TRUE)

summary(model.outer.0500)
summary(model.outer.1000)
summary(model.outer.2000)
summary(model.outer.3000)
summary(model.outer.4000)
summary(model.outer.5000)
summary(model.outer.6000)

# create LaTeX tables with the xtable package
#xtable(model.outer.0500)
#xtable(model.outer.1000)
#xtable(model.outer.2000)
#xtable(model.outer.3000)
#xtable(model.outer.4000)
#xtable(model.outer.5000)
#xtable(model.outer.6000)

# ---------------------------------------------------------------------------------------------------- #
# generate quantities of interest after Model with Zelig. see King, Tomz and Wittenberg (2000)
# ---------------------------------------------------------------------------------------------------- #
max.mean <- mean(d$RailCost.lag[!is.na(d$RailCost.lag)])
max.sd.l <- mean(d$RailCost.lag[!is.na(d$RailCost.lag)]) - sd(d$RailCost.lag[!is.na(d$RailCost.lag)])
max.sd.h <- mean(d$RailCost.lag[!is.na(d$RailCost.lag)]) + sd(d$RailCost.lag[!is.na(d$RailCost.lag)])

# prediction with shipping at its mean value
out <- setx(model.outer.2000, RailCost.lag=max.mean) 
sout <- sim(model.outer.2000,x=out) 
summary(sout)

# prediction with shipping one standard deviation below its mean 
out <- setx(model.outer.2000, RailCost.lag=max.sd.l) 
sout <- sim(model.outer.2000,x=out) 
summary(sout)

# prediction with shipping one standard deviation above its mean
out <- setx(model.outer.2000, RailCost.lag=max.sd.h) 
sout <- sim(model.outer.2000,x=out) 
summary(sout)


# ---------------------------------------------------------------------------------------------------- #
# End of File
# ---------------------------------------------------------------------------------------------------- #
